| \n", " | Index | \n", "Peptide | \n", "pep count | \n", "Dataset % | \n", "
|---|---|---|---|---|
| 0 | \n", "1 | \n", "YYTHWIYTVNPRRC | \n", "14434 | \n", "22.391486 | \n", "
| 1 | \n", "2 | \n", "YFTYWYHTNNTGRC | \n", "10900 | \n", "16.909187 | \n", "
| 2 | \n", "3 | \n", "YTSHWYITRVTARC | \n", "4980 | \n", "7.725482 | \n", "
| 3 | \n", "4 | \n", "YYTFWYYTNVRRRC | \n", "4307 | \n", "6.681456 | \n", "
| 4 | \n", "5 | \n", "YTWELPSWYLEHYFPTC | \n", "2021 | \n", "3.135180 | \n", "
| 5 | \n", "6 | \n", "YFAHWLVTYNPARC | \n", "1588 | \n", "2.463467 | \n", "
| 6 | \n", "7 | \n", "YIYYVNyTTLTyLFTDC | \n", "1316 | \n", "2.041513 | \n", "
| 7 | \n", "8 | \n", "YETHWFVTYRAELC | \n", "1227 | \n", "1.903447 | \n", "
| 8 | \n", "9 | \n", "YPTWYTLHYYPLFEC | \n", "1050 | \n", "1.628867 | \n", "
| 9 | \n", "10 | \n", "YYTFWYFTSEAHKC | \n", "916 | \n", "1.420992 | \n", "
| 10 | \n", "11 | \n", "YISHWYVTNVARTC | \n", "912 | \n", "1.414787 | \n", "
| 11 | \n", "12 | \n", "YTTFWYVTRRPTAC | \n", "874 | \n", "1.355838 | \n", "
| 12 | \n", "13 | \n", "YTAYWYTTSNTNRC | \n", "843 | \n", "1.307747 | \n", "
| 13 | \n", "14 | \n", "YTTHWYDTDNKSRC | \n", "749 | \n", "1.161925 | \n", "
| 14 | \n", "15 | \n", "YHTYWLDTYRPLVC | \n", "671 | \n", "1.040923 | \n", "
| 15 | \n", "16 | \n", "YLEWSLDyyELTYyRSC | \n", "649 | \n", "1.006795 | \n", "
| 16 | \n", "17 | \n", "YHTYWYSTTKTRHC | \n", "641 | \n", "0.994384 | \n", "
| 17 | \n", "18 | \n", "YGPQWYRRyYLyC | \n", "528 | \n", "0.819087 | \n", "
| 18 | \n", "19 | \n", "YLLNyRGHATQQVFWWC | \n", "510 | \n", "0.791164 | \n", "
| 19 | \n", "20 | \n", "YVIyKGVRNySWHASWC | \n", "468 | \n", "0.726009 | \n", "
| 20 | \n", "21 | \n", "YRYyTSTYEySWNC | \n", "430 | \n", "0.667060 | \n", "
| 21 | \n", "22 | \n", "YLIFTWRTKRDCIIAyC | \n", "369 | \n", "0.572430 | \n", "
| 22 | \n", "23 | \n", "YyGWHCPLKHFRTC | \n", "342 | \n", "0.530545 | \n", "
| 23 | \n", "24 | \n", "YyTHPTTTySyYFFNC | \n", "244 | \n", "0.378518 | \n", "
| 24 | \n", "25 | \n", "YyTYWSyLTQLYHyTSC | \n", "225 | \n", "0.349043 | \n", "
| \n", " | Index | \n", "Peptide | \n", "pep count | \n", "Dataset % | \n", "
|---|---|---|---|---|
| 0 | \n", "1 | \n", "YYTHWIYTVNPRRCSALSA | \n", "14226 | \n", "9.296520 | \n", "
| 1 | \n", "2 | \n", "nan | \n", "13679 | \n", "8.939062 | \n", "
| 2 | \n", "3 | \n", "YPKWYLKyYTNFPRYLHRTGGG_ | \n", "12061 | \n", "7.881719 | \n", "
| 3 | \n", "4 | \n", "YFTYWYHTNNTGRCSALSA | \n", "10782 | \n", "7.045908 | \n", "
| 4 | \n", "5 | \n", "YTSHWYITRVTARCSALSA | \n", "4923 | \n", "3.217121 | \n", "
| 5 | \n", "6 | \n", "YTyyCySyyFNWYPRFPRYLHRTGGG_ | \n", "4409 | \n", "2.881229 | \n", "
| 6 | \n", "7 | \n", "YYTFWYYTNVRRRCSALSA | \n", "4233 | \n", "2.766215 | \n", "
| 7 | \n", "8 | \n", "YRAyCWYyySRyyWFPRYLHRTGGG_ | \n", "2387 | \n", "1.559876 | \n", "
| 8 | \n", "9 | \n", "YPSWYTYWYyRSFPRYLHRTGGG_ | \n", "2089 | \n", "1.365136 | \n", "
| 9 | \n", "10 | \n", "YTWELPSWYLEHYFPTCSALSA | \n", "1976 | \n", "1.291292 | \n", "
| 10 | \n", "11 | \n", "YFAHWLVTYNPARCSALSA | \n", "1566 | \n", "1.023362 | \n", "
| 11 | \n", "12 | \n", "YIHYyCyyyTyySFPRYLHRTGGG_ | \n", "1374 | \n", "0.897893 | \n", "
| 12 | \n", "13 | \n", "YIYYVNyTTLTyLFTDCSALSA | \n", "1285 | \n", "0.839732 | \n", "
| 13 | \n", "14 | \n", "YETHWFVTYRAELCSALSA | \n", "1212 | \n", "0.792027 | \n", "
| 14 | \n", "15 | \n", "YPTWYTLHYYPLFECSALSA | \n", "1028 | \n", "0.671786 | \n", "
| 15 | \n", "16 | \n", "YHySLSAYyKAWSESFPRYLHRTGGG_ | \n", "960 | \n", "0.627348 | \n", "
| 16 | \n", "17 | \n", "YYTFWYFTSEAHKCSALSA | \n", "904 | \n", "0.590753 | \n", "
| 17 | \n", "18 | \n", "YISHWYVTNVARTCSALSA | \n", "890 | \n", "0.581604 | \n", "
| 18 | \n", "19 | \n", "YTTFWYVTRRPTACSALSA | \n", "868 | \n", "0.567228 | \n", "
| 19 | \n", "20 | \n", "YTAYWYTTSNTNRCSALSA | \n", "823 | \n", "0.537821 | \n", "
| 20 | \n", "21 | \n", "YyTyCFFVyyFyyyHFPRYLHRTGGG_ | \n", "814 | \n", "0.531939 | \n", "
| 21 | \n", "22 | \n", "YFCYTLYyAyyTAyFPRYLHRTGGG_ | \n", "784 | \n", "0.512335 | \n", "
| 22 | \n", "23 | \n", "YHyTTyYCDyWLyWTRFPRYLHRTGGG_ | \n", "776 | \n", "0.507107 | \n", "
| 23 | \n", "24 | \n", "YTTHWYDTDNKSRCSALSA | \n", "750 | \n", "0.490116 | \n", "
| 24 | \n", "25 | \n", "YHTYWLDTYRPLVCSALSA | \n", "658 | \n", "0.429995 | \n", "
| \n", " | Unnamed: 0 | \n", "elapsed time, s | \n", "DCCL22_r7 | \n", "
|---|---|---|---|
| 0 | \n", "nan | \n", "nan | \n", "0 | \n", "
| 1 | \n", "fetch_dir_fastq | \n", "1.7 | \n", "153025 | \n", "
| 2 | \n", "trim_exp | \n", "2.6 | \n", "153025 | \n", "
| 3 | \n", "translate_dna | \n", "5.3 | \n", "153025 | \n", "
| 4 | \n", "length_summary | \n", "1.9 | \n", "153025 | \n", "
| 5 | \n", "length_summary | \n", "3.5 | \n", "153025 | \n", "
| 6 | \n", "q_score_summary | \n", "5.5 | \n", "153025 | \n", "
| 7 | \n", "fastq_count_summary | \n", "0.4 | \n", "153025 | \n", "
| 8 | \n", "length_filter | \n", "0.7 | \n", "68564 | \n", "
| 9 | \n", "constant_region_filter | \n", "0.2 | \n", "65769 | \n", "
| 10 | \n", "variable_region_filter | \n", "2.0 | \n", "64921 | \n", "
| 11 | \n", "q_score_filter | \n", "0.3 | \n", "64462 | \n", "
| 12 | \n", "fetch_region | \n", "0.1 | \n", "64462 | \n", "
| 13 | \n", "filter_ambiguous | \n", "2.3 | \n", "64462 | \n", "
| 14 | \n", "sequence_level_convergence_summary | \n", "3.4 | \n", "64462 | \n", "
| 15 | \n", "token_level_convergence_analysis | \n", "20.8 | \n", "64462 | \n", "
| 16 | \n", "token_level_convergence_analysis | \n", "48.9 | \n", "64462 | \n", "
| 17 | \n", "unpad_data | \n", "2.1 | \n", "64462 | \n", "
| 18 | \n", "save_data | \n", "0.1 | \n", "64462 | \n", "
| 19 | \n", "fastq_count_summary | \n", "1.1 | \n", "64462 | \n", "
| 20 | \n", "fastq_count_summary | \n", "1.0 | \n", "64462 | \n", "
| 21 | \n", "fastq_count_summary | \n", "0.5 | \n", "64462 | \n", "
| 22 | \n", "fastq_count_summary | \n", "0.4 | \n", "64462 | \n", "
| 23 | \n", "library_design_summary | \n", "0.0 | \n", "64462 | \n", "
| 24 | \n", "joint_dataset_count_summary | \n", "2.1 | \n", "64462 | \n", "
| 25 | \n", "umap_hdbscan_summary | \n", "560.0 | \n", "64462 | \n", "